* Merge the additional data on institutional investors

clear all

* Set your directory path here
local root "D:\Understanding_Bank_Payouts"


// Clean the Bushee data again?
if 1 == 1 {

	import excel "`root'\Data\Bushee institutional ownership\iiclassdataBrianBushee1981to2018.xlsx", sheet("iiclassdataBrianBushee1981to201")

	rename A mgrno
	rename B mgrno_version
	rename C mgrno_permkey
	rename D year 
	rename F inst_class_raw
	rename G inst_class_perm
	drop E H I J K L M

	label variable inst_class_raw "Inst. inv. type from Bushee (2001), raw"
	label variable inst_class_perm "Inst. inv. type from Bushee (2001), permanent"

	dups mgrno year, t drop
	drop _expand

	save "Bushee_classification_cleaned.dta", replace

	clear

	}



// Get data on the number of shares from CRSP again?
if 1 == 1 {

	forvalues yr = 1990/2018 {

		use "`root'\Data\CRSP\CRSP_`yr'.dta", clear

		generate year = year(date)
		sort ncusip year date
		bys ncusip year (date): keep if _n == _N 			// Nr of shares at the end of the year
		keep ncusip shrout year
		rename ncusip cusip
		sort cusip
		drop if cusip == ""

		save "TEMP_CRSP_shrout_data_`yr'.dta", replace

		}

	use  "TEMP_CRSP_shrout_data_1990.dta", clear

	forvalues yr = 1991/2018 {

		append using "TEMP_CRSP_shrout_data_`yr'.dta"
		erase "TEMP_CRSP_shrout_data_`yr'.dta"
		}

	save "TEMP_CRSP_shrout_data_all.dta", replace

	}





// Clean the holdings summary data again?
if 1 == 1 {

	use "`root'\Data\TR_institutional\Holdings_summary_1980to2021.dta" 
	dups cusip rdate, t
	generate year = year(rdate)
	generate month = month(rdate)
	tab month
	keep if month == 12
	drop month rdate
	dups cusip year, t

	save "Holdings_summary_yearly.dta", replace

	clear

	}





// Clean the institutional ownership data again?
if 1 == 1 {

	use "`root'\Data\TR_institutional\13F_holdings_1990to2021.dta" 

	drop sole shared no
	rename fdate date
	generate year = year(date)
	generate month = month(date)
	tab month
	keep if month == 12
	drop month date

	merge m:1 mgrno year using "Bushee_classification_cleaned.dta"

	drop if _merge == 2
	drop _merge
	replace inst_class_perm = "UNK" if inst_class_perm == ""
	replace inst_class_perm = "UNK" if inst_class_perm == "."

	sort cusip year inst_class_perm mgrno
	bys cusip year: egen nrshs_inst = total(shares)
	label variable nrshs_inst "Nr of shares held by institutions (TR)"

	gen inst_class_perm_lowercase = lower(inst_class_perm)

	foreach inst in ded qix tra unk {
		generate dummy_`inst' = (inst_class_perm_lowercase == "`inst'")
		generate shs_times_`inst' = dummy_`inst' * shares

		by cusip year: egen nrshs_`inst' = total(shs_times_`inst')
		drop dummy_`inst' shs_times_`inst'

		}

	// De-duplicate the observations
	drop if cusip == ""
	drop mgrno type mgrno_version mgrno_permkey inst_class_raw inst_class_perm inst_class_perm_lowercase shares
	dups, t drop
	drop _expand

	merge 1:1 cusip year using "Holdings_summary_yearly.dta"
	drop _merge

	merge 1:1 cusip year using "TEMP_CRSP_shrout_data_all.dta", update 
	drop _merge

	generate instown_perc = InstOwn_Perc
	generate helper = InstOwn_Perc
	replace instown_perc = InstOwn/ (shrout*1000) if helper == .
	drop helper
	label variable instown_perc "Total Inst. Ownership, Percent of Shares Outstanding"

	foreach inst in ded qix tra unk {
		generate `inst'_own_perc = nrshs_`inst' / (shrout*1000)
		} 


	generate top5inst_own_perc =  Top5InstOwn / (shrout*1000)
	generate top10inst_own_perc =  Top10InstOwn / (shrout*1000)
	generate instblk_own_perc = InstBlockOwn / (shrout*1000)

	generate cusip6 = substr(cusip,1,6)

	rename NumInstOwners nr_inst_owners
	rename NumInstBlockOwners nr_inst_blk_owners
	rename InstOwn_HHI instown_hhi	

	collapse (max) instown_perc ded_own_perc qix_own_perc tra_own_perc unk_own_perc top5inst_own_perc top10inst_own_perc instblk_own_perc ///
	nr_inst_owners nr_inst_blk_owners instown_hhi, by(cusip6 year)

	label variable top5inst_own_perc "Largest 5 institutional ownership size, %"
	label variable top10inst_own_perc "Largest 10 institutional ownership size, %"
	label variable instblk_own_perc "Total ownership by institutional blockholders, %"
	label variable nr_inst_owners "Number of 13-F Institutional Owners"
	label variable nr_inst_blk_owners "Number of >5% Institutional Block Ownerships"
	label variable instown_hhi "Ownership Concentration - Herfindahl-Hirschman Index"

	foreach inst in ded qix tra unk {
		label variable `inst'_own_perc "`inst' Inst. Ownership, Percent of Shares Outstanding"
		} 

	save "Institutional_ownership_types_detailed.dta", replace
	}

timer off 1
timer list

exit, clear
